Libraries

## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

Data

This file is based on the “DataCleaning_Seine_2019” R Markdown script. Extracted relevant salmon abundances (with zeros where we seined but caught no salmon), environmental information (from YSI meter), habitat information (i.e. shoot/flowering shoot density/m2), and otter density by site.

Exploratory Analysis (EDA)

# Take a look at data
glimpse(dat)
## Observations: 42
## Variables: 30
## $ X                              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...
## $ site                           <fct> Salt Lake Bay, Nossuk Bay, Chus...
## $ n_surv1                        <int> 90, 28, 60, 104, 21, 108, 65, 1...
## $ n_surv2                        <int> 48, 50, 50, 82, 31, 132, 27, 5,...
## $ dens_surv1                     <dbl> 9.9447514, 4.3495146, 5.4533061...
## $ dens_surv2                     <dbl> 5.30386740, 7.76699029, 4.54442...
## $ avg_density                    <dbl> 7.62430939, 6.05825243, 4.99886...
## $ sd_density                     <dbl> 3.28160053, 2.41652026, 0.64267...
## $ year                           <int> 2017, 2017, 2017, 2017, 2017, 2...
## $ dissolved_02_mg.l_surface      <dbl> 12.80, 10.58, 8.68, 9.55, 11.62...
## $ dissolved_02_percent_surface   <dbl> NA, 115.6, 98.8, 104.5, 135.3, ...
## $ specific_conductivity_surface  <dbl> NA, 47004.0, 4549.4, 44498.0, 4...
## $ salinity_ppt_surface           <dbl> 6.0, 30.4, 29.4, 28.9, 29.0, 31...
## $ temperature_C_surface          <dbl> 8.0, 10.9, 12.9, 11.7, 14.1, 12...
## $ dissolved_02_mg.l_transect     <dbl> 13.30, 10.72, 8.95, 10.03, 10.7...
## $ dissolved_02_percent_transect  <dbl> NA, 116.60, 101.40, 108.20, 124...
## $ specific_conductivity_transect <int> NA, 47741, 45527, 47365, 45655,...
## $ salinity_ppt_transect          <dbl> 28.0, 30.9, 29.5, 30.6, 29.7, 3...
## $ temperature_C_transect         <dbl> 8.0, 10.3, 12.8, 11.0, 13.7, 11...
## $ avg_shoot                      <dbl> 226.5, 245.0, 220.5, 180.5, 205...
## $ sd_shoot                       <dbl> 40.22437, 57.55991, 71.85501, 4...
## $ avg_flowering                  <dbl> 0.0, 0.0, 6.0, 0.0, 2.5, 2.0, 1...
## $ sd_flowering                   <dbl> 0.000000, 0.000000, 3.703280, 0...
## $ date                           <fct> 2017-05-12, 2017-05-15, 2017-06...
## $ juli_date                      <int> 132, 135, 164, 189, 220, 221, 2...
## $ SALCHIN                        <int> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ SALCHUM                        <int> 46, 75, 1, 0, 0, 1, 0, 56, 0, 6...
## $ SALCOHO                        <int> 0, 4, 0, 0, 0, 1, 0, 6, 0, 0, 0...
## $ SALPINK                        <int> 10, 8, 0, 0, 0, 0, 0, 2, 0, 2, ...
## $ SALSOCK                        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
# do some minor clean up 
dat <- dat[-42,-1]
dat$year <- as.factor(dat$year)
dat[,25:29] <- data.frame(lapply(dat[,25:29], as.numeric), stringsAsFactors = FALSE)
dat <- dat %>%
  rowwise() %>%
  mutate(abundance = sum(SALCHIN, SALCHUM, SALCOHO, SALPINK, SALSOCK))
# visualize relationships among variables: scatterplot matricies
corrgram(dat[,c(4,5,8,9,12:14,17:19,21,25:29)], lower.panel=panel.shade, upper.panel=panel.ellipse,
         diag.panel=panel.density)

pairs(dat[,c(5,6,9,10,13:15,18:20,22,25:30)], lower.panel = panel.smooth)

# graph abundance data 
dat$site <- reorder(dat$site, -dat$avg_density)
dat %>%
  group_by(site, avg_density, year) %>%
  summarise(abundance = as.numeric(sum(SALCHIN, SALCHUM, SALCOHO, SALPINK, SALSOCK))) %>%
  ggplot() + geom_bar(aes(x = site, y = abundance, fill = year), stat = "identity", position = "dodge") + theme(axis.text.x = element_text(angle = 90))
## Warning: Grouping rowwise data frame strips rowwise nature

# look at the sites against the average sea otter density
ggplot(data = dat) + geom_bar(aes(x = site, y = avg_density, fill = year), stat = "identity") + theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 1 rows containing missing values (position_stack).

# visualize distribution of salmon abundance
hist(x = dat$abundance)

boxplot(dat$abundance)

# distribution of abundances
# Can see that there are a lot of zeros and a few low, some mid, and only a few high abundances
plot(table(dat$abundance))

Models

Binomial models

  1. Predictor variables: avg_density, juli_date,
  2. Binomial family (salmon is present or not)
  3. Logit link (because its the binomial family) ### Graphical exploration
hist(as.numeric(dat$abundance > 0))

dat$year <- as.factor(dat$year)
summary(dat)
##             site       n_surv1          n_surv2        dens_surv1    
##  Kaguk Cove   : 2   Min.   :  0.00   Min.   :  0.0   Min.   : 0.000  
##  Guktu Bay    : 2   1st Qu.:  1.00   1st Qu.:  1.0   1st Qu.: 0.113  
##  Salt Lake Bay: 2   Median : 18.00   Median : 14.0   Median : 1.221  
##  Chusini Cove : 2   Mean   : 30.22   Mean   : 36.4   Mean   : 3.127  
##  Shinaku Inlet: 2   3rd Qu.: 60.00   3rd Qu.: 48.0   3rd Qu.: 5.453  
##  Nossuk Bay   : 2   Max.   :108.00   Max.   :278.0   Max.   :15.465  
##  (Other)      :29                    NA's   :1                       
##    dens_surv2       avg_density         sd_density         year   
##  Min.   : 0.0000   Min.   : 0.00000   Min.   : 0.00000   2017:21  
##  1st Qu.: 0.1011   1st Qu.: 0.09921   1st Qu.: 0.07406   2019:20  
##  Median : 0.6984   Median : 0.95503   Median : 0.19105            
##  Mean   : 4.1787   Mean   : 3.62229   Mean   : 1.62919            
##  3rd Qu.: 5.3163   3rd Qu.: 4.90825   3rd Qu.: 1.95997            
##  Max.   :39.8513   Max.   :26.84015   Max.   :18.40055            
##  NA's   :1         NA's   :1          NA's   :1                   
##  dissolved_02_mg.l_surface dissolved_02_percent_surface
##  Min.   : 8.240            Min.   : 90.7               
##  1st Qu.: 9.578            1st Qu.:104.2               
##  Median :10.185            Median :112.2               
##  Mean   :10.400            Mean   :114.6               
##  3rd Qu.:10.950            3rd Qu.:121.2               
##  Max.   :13.030            Max.   :146.7               
##  NA's   :1                 NA's   :3                   
##  specific_conductivity_surface salinity_ppt_surface temperature_C_surface
##  Min.   : 4549                 Min.   : 3.80        Min.   : 8.00        
##  1st Qu.:44876                 1st Qu.:28.98        1st Qu.:11.30        
##  Median :47208                 Median :30.40        Median :12.45        
##  Mean   :43771                 Mean   :27.80        Mean   :12.11        
##  3rd Qu.:48976                 3rd Qu.:31.60        3rd Qu.:12.82        
##  Max.   :64325                 Max.   :32.60        Max.   :14.70        
##  NA's   :3                     NA's   :1            NA's   :1            
##  dissolved_02_mg.l_transect dissolved_02_percent_transect
##  Min.   :  6.520            Min.   :  9.87               
##  1st Qu.:  9.453            1st Qu.:102.33               
##  Median : 10.085            Median :112.35               
##  Mean   : 12.577            Mean   :108.23               
##  3rd Qu.: 11.000            3rd Qu.:119.47               
##  Max.   :105.000            Max.   :150.10               
##  NA's   :1                  NA's   :3                    
##  specific_conductivity_transect salinity_ppt_transect
##  Min.   :38426                  Min.   :24.30        
##  1st Qu.:46075                  1st Qu.:29.77        
##  Median :47603                  Median :30.80        
##  Mean   :47332                  Mean   :30.62        
##  3rd Qu.:49095                  3rd Qu.:31.73        
##  Max.   :50436                  Max.   :32.90        
##  NA's   :3                      NA's   :1            
##  temperature_C_transect   avg_shoot         sd_shoot      avg_flowering  
##  Min.   : 8.00          Min.   :  93.0   Min.   : 29.45   Min.   :0.000  
##  1st Qu.:11.10          1st Qu.: 220.5   1st Qu.: 48.73   1st Qu.:0.000  
##  Median :12.25          Median : 283.0   Median : 69.69   Median :0.500  
##  Mean   :11.97          Mean   : 337.3   Mean   : 82.85   Mean   :1.522  
##  3rd Qu.:12.72          3rd Qu.: 369.0   3rd Qu.: 87.29   3rd Qu.:2.500  
##  Max.   :16.60          Max.   :1291.2   Max.   :287.32   Max.   :8.000  
##  NA's   :1                                                               
##   sd_flowering           date      juli_date        SALCHIN       
##  Min.   :0.000   2017-05-10: 1   Min.   :109.0   Min.   :0.00000  
##  1st Qu.:0.000   2017-05-11: 1   1st Qu.:131.0   1st Qu.:0.00000  
##  Median :1.414   2017-05-12: 1   Median :142.0   Median :0.00000  
##  Mean   :2.059   2017-05-13: 1   Mean   :159.2   Mean   :0.04878  
##  3rd Qu.:3.578   2017-05-14: 1   3rd Qu.:169.0   3rd Qu.:0.00000  
##  Max.   :8.485   2017-05-15: 1   Max.   :238.0   Max.   :2.00000  
##                  (Other)   :35                                    
##     SALCHUM          SALCOHO          SALPINK          SALSOCK       
##  Min.   :  0.00   Min.   : 0.000   Min.   :  0.00   Min.   : 0.0000  
##  1st Qu.:  0.00   1st Qu.: 0.000   1st Qu.:  0.00   1st Qu.: 0.0000  
##  Median :  2.00   Median : 0.000   Median :  0.00   Median : 0.0000  
##  Mean   : 15.02   Mean   : 2.537   Mean   : 12.95   Mean   : 0.7317  
##  3rd Qu.: 15.00   3rd Qu.: 1.000   3rd Qu.:  2.00   3rd Qu.: 0.0000  
##  Max.   :128.00   Max.   :27.000   Max.   :287.00   Max.   :27.0000  
##                                                                      
##    abundance     
##  Min.   :  0.00  
##  1st Qu.:  0.00  
##  Median :  4.00  
##  Mean   : 31.29  
##  3rd Qu.: 31.00  
##  Max.   :345.00  
## 
plot((abundance > 0) ~ avg_density, data = dat)

plot((abundance >0) ~ avg_shoot, data = dat)

plot((abundance >0) ~ avg_flowering, data = dat)

plot((abundance >0) ~ juli_date, data = dat)

# hard to see any real patterns of presence of salmon with otter density or shoot density, flowering density
# or julian date

table((dat$abundance >0), dat$year)
##        
##         2017 2019
##   FALSE   10    3
##   TRUE    11   17
# there are more sites with salmon than without in 2019, likely due to the time of sampling
# makes sense that 2017 has about half and half due to the even spread of sampling across the summer. 
table((dat$abundance >0), dat$juli_date)
##        
##         109 113 114 125 126 127 128 129 130 131 132 133 134 135 137 138
##   FALSE   1   1   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   TRUE    0   0   1   1   1   1   1   1   2   1   1   0   1   1   1   1
##        
##         139 140 141 142 157 158 159 161 162 163 164 166 167 169 188 189
##   FALSE   0   0   0   0   0   0   0   0   0   1   0   0   1   0   0   1
##   TRUE    1   1   1   1   1   1   1   1   1   0   1   1   0   1   1   0
##        
##         218 219 220 221 222 223 231 238
##   FALSE   1   1   1   0   1   1   1   1
##   TRUE    0   0   0   1   0   0   0   0
# appears that there are more "false" i.e. no salmon later in the summer when compared to earlier in the summer, once again makes sense based on what we know about out migration of salmon. 
plot(abundance ~ year, data = dat)

dat$abundance[dat$abundance > 0] <- 1
# convert to 0, 1 for modelling

plot(dat$juli_date, dat$abundance)

ggplot(dat, aes(avg_density, abundance, color = year)) + geom_jitter(width = 0.5, height = 0.05) + facet_wrap(~year, ncol = 1)
## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(data = dat, aes(x = reorder(site, -avg_density), y = avg_density, fill = year)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 90)) +facet_wrap(~year, ncol = 1)
## Warning: Removed 1 rows containing missing values (position_stack).

# finally lets look at a corrigram of the response variables
library(corrgram)
corrgram(dat[,c(1,6,8,19,21,24,30)], lower.panel=panel.shade, upper.panel=panel.ellipse,
         diag.panel=panel.density)

# No distinguishable pattern aside from the decline in abundance over time

Model fitting and selection

Want to start by fitting a full model including interaction

#Try a fit that includes sea otter density and date, including an interaction
fit1 <- glm(abundance ~ avg_density*juli_date*year, family = binomial(link = logit), data = dat)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit1)
## 
## Call:
## glm(formula = abundance ~ avg_density * juli_date * year, family = binomial(link = logit), 
##     data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.42287  -0.06052   0.00000   0.34396   1.39514  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                     1.106e+01  4.868e+00   2.273   0.0230 *
## avg_density                    -8.807e-01  8.876e-01  -0.992   0.3211  
## juli_date                      -6.153e-02  2.669e-02  -2.305   0.0212 *
## year2019                        9.894e+01  4.095e+03   0.024   0.9807  
## avg_density:juli_date           4.925e-03  4.543e-03   1.084   0.2783  
## avg_density:year2019           -1.200e+04  3.155e+05  -0.038   0.9697  
## juli_date:year2019             -6.855e-01  2.939e+01  -0.023   0.9814  
## avg_density:juli_date:year2019  1.052e+02  2.766e+03   0.038   0.9697  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 17.751  on 32  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 33.751
## 
## Number of Fisher Scoring iterations: 25
# try without interaction (not significant)
fit1.red <- glm(abundance ~ avg_density+juli_date+year, family = binomial(link = logit), data = dat)
summary(fit1.red)
## 
## Call:
## glm(formula = abundance ~ avg_density + juli_date + year, family = binomial(link = logit), 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2401  -0.5612   0.4685   0.6753   1.4344  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  6.00802    2.56731   2.340   0.0193 *
## avg_density  0.13662    0.10609   1.288   0.1978  
## juli_date   -0.03516    0.01431  -2.456   0.0140 *
## year2019     0.14592    0.99396   0.147   0.8833  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 36.306  on 36  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 44.306
## 
## Number of Fisher Scoring iterations: 5
fit1.time <- glm(abundance ~ juli_date+year, family = binomial(link = logit), data = dat[-41,])
summary(fit1.time)
## 
## Call:
## glm(formula = abundance ~ juli_date + year, family = binomial(link = logit), 
##     data = dat[-41, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3462  -0.7185   0.5535   0.6557   1.6963  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  5.71661    2.45065   2.333   0.0197 *
## juli_date   -0.03115    0.01325  -2.351   0.0187 *
## year2019     0.36545    0.95053   0.384   0.7006  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 38.849  on 37  degrees of freedom
## AIC: 44.849
## 
## Number of Fisher Scoring iterations: 4
# compare AIC values and Analysis of Deviance
AIC(fit1, fit1.red, fit1.time) # reduced has slightly lower AIC value
##           df      AIC
## fit1       8 33.75077
## fit1.red   4 44.30606
## fit1.time  3 44.84905
anova(fit1, fit1.red, fit1.time, test = "Chisq") # the null hypothesis is rejected indicating
## Analysis of Deviance Table
## 
## Model 1: abundance ~ avg_density * juli_date * year
## Model 2: abundance ~ avg_density + juli_date + year
## Model 3: abundance ~ juli_date + year
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        32     17.751                          
## 2        36     36.306 -4  -18.555 0.0009609 ***
## 3        37     38.849 -1   -2.543 0.1107849    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# that the true model is the second model that includes avg_density, juli_date, and y ear
anova(fit1, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: abundance
## 
## Terms added sequentially (first to last)
## 
## 
##                            Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                          39     50.446              
## avg_density                 1   1.8074        38     48.639 0.1788189    
## juli_date                   1  12.3115        37     36.328 0.0004502 ***
## year                        1   0.0215        36     36.306 0.8834323    
## avg_density:juli_date       1   0.0505        35     36.256 0.8221146    
## avg_density:year            1   3.2129        34     33.043 0.0730593 .  
## juli_date:year              1   2.6951        33     30.348 0.1006574    
## avg_density:juli_date:year  1  12.5967        32     17.751 0.0003864 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Try another fit using seagrass densities and date
# If the seagrass and sea otter have a hypothesized relationship (based on WR work) they should 
# be in included separately. 
fit2.intx <- glm(abundance ~ avg_shoot * avg_flowering * juli_date * year, family= binomial(link = logit), data = dat[-41,])
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit2.intx)
## 
## Call:
## glm(formula = abundance ~ avg_shoot * avg_flowering * juli_date * 
##     year, family = binomial(link = logit), data = dat[-41, ])
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.04365  -0.00008   0.00000   0.17662   1.65162  
## 
## Coefficients:
##                                              Estimate Std. Error z value
## (Intercept)                                 5.684e+00  2.435e+01   0.233
## avg_shoot                                   1.941e-02  1.134e-01   0.171
## avg_flowering                              -2.133e+01  2.978e+01  -0.716
## juli_date                                  -2.263e-02  1.631e-01  -0.139
## year2019                                    2.480e+02  3.419e+05   0.001
## avg_shoot:avg_flowering                     9.299e-02  1.472e-01   0.632
## avg_shoot:juli_date                        -1.745e-04  7.905e-04  -0.221
## avg_flowering:juli_date                     1.322e-01  1.889e-01   0.700
## avg_shoot:year2019                         -3.244e+00  1.820e+03  -0.002
## avg_flowering:year2019                      1.130e+03  2.300e+05   0.005
## juli_date:year2019                         -2.588e+00  2.372e+03  -0.001
## avg_shoot:avg_flowering:juli_date          -5.674e-04  9.369e-04  -0.606
## avg_shoot:avg_flowering:year2019           -1.197e-01  5.687e+02   0.000
## avg_shoot:juli_date:year2019                2.926e-02  1.347e+01   0.002
## avg_flowering:juli_date:year2019           -5.877e+00  1.244e+03  -0.005
## avg_shoot:avg_flowering:juli_date:year2019 -4.250e-03  4.314e+00  -0.001
##                                            Pr(>|z|)
## (Intercept)                                   0.815
## avg_shoot                                     0.864
## avg_flowering                                 0.474
## juli_date                                     0.890
## year2019                                      0.999
## avg_shoot:avg_flowering                       0.528
## avg_shoot:juli_date                           0.825
## avg_flowering:juli_date                       0.484
## avg_shoot:year2019                            0.999
## avg_flowering:year2019                        0.996
## juli_date:year2019                            0.999
## avg_shoot:avg_flowering:juli_date             0.545
## avg_shoot:avg_flowering:year2019              1.000
## avg_shoot:juli_date:year2019                  0.998
## avg_flowering:juli_date:year2019              0.996
## avg_shoot:avg_flowering:juli_date:year2019    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 15.079  on 24  degrees of freedom
## AIC: 47.079
## 
## Number of Fisher Scoring iterations: 23
# drop the interactions, not significant
fit2 <- glm(abundance ~ avg_shoot + avg_flowering + juli_date + year, family = binomial(link = logit), data = dat[-41,])
summary(fit2)
## 
## Call:
## glm(formula = abundance ~ avg_shoot + avg_flowering + juli_date + 
##     year, family = binomial(link = logit), data = dat[-41, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3329  -0.7028   0.5193   0.6456   1.6576  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    5.939517   2.477246   2.398   0.0165 *
## avg_shoot     -0.001229   0.001853  -0.663   0.5072  
## avg_flowering  0.052508   0.204545   0.257   0.7974  
## juli_date     -0.031212   0.013522  -2.308   0.0210 *
## year2019       0.625118   1.067014   0.586   0.5580  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 38.340  on 35  degrees of freedom
## AIC: 48.34
## 
## Number of Fisher Scoring iterations: 4
# looks like julian date is important.... 
# lets drop the other non significant values and see 
fit2.red <- glm(abundance ~ juli_date + year, family = binomial(link = logit), data = dat[-41,])
summary(fit2.red)
## 
## Call:
## glm(formula = abundance ~ juli_date + year, family = binomial(link = logit), 
##     data = dat[-41, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3462  -0.7185   0.5535   0.6557   1.6963  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  5.71661    2.45065   2.333   0.0197 *
## juli_date   -0.03115    0.01325  -2.351   0.0187 *
## year2019     0.36545    0.95053   0.384   0.7006  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50.446  on 39  degrees of freedom
## Residual deviance: 38.849  on 37  degrees of freedom
## AIC: 44.849
## 
## Number of Fisher Scoring iterations: 4
anova(fit2.intx, fit2, fit2.red, test= "Chisq")
## Analysis of Deviance Table
## 
## Model 1: abundance ~ avg_shoot * avg_flowering * juli_date * year
## Model 2: abundance ~ avg_shoot + avg_flowering + juli_date + year
## Model 3: abundance ~ juli_date + year
##   Resid. Df Resid. Dev  Df Deviance Pr(>Chi)  
## 1        24     15.079                        
## 2        35     38.340 -11 -23.2603  0.01624 *
## 3        37     38.849  -2  -0.5094  0.77513  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(fit2.intx, fit2, fit2.red) # fit 2 reduced only reduces the AIC values by 3... 
##           df      AIC
## fit2.intx 16 47.07934
## fit2       5 48.33961
## fit2.red   3 44.84905
anova(fit2.intx, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: abundance
## 
## Terms added sequentially (first to last)
## 
## 
##                                        Df Deviance Resid. Df Resid. Dev
## NULL                                                      39     50.446
## avg_shoot                               1   0.0266        38     50.420
## avg_flowering                           1   0.0862        37     50.334
## juli_date                               1  11.6468        36     38.687
## year                                    1   0.3473        35     38.340
## avg_shoot:avg_flowering                 1   4.2249        34     34.115
## avg_shoot:juli_date                     1   0.0526        33     34.062
## avg_flowering:juli_date                 1   0.0820        32     33.980
## avg_shoot:year                          1   1.4187        31     32.561
## avg_flowering:year                      1   0.2359        30     32.325
## juli_date:year                          1   2.0588        29     30.267
## avg_shoot:avg_flowering:juli_date       1   0.0158        28     30.251
## avg_shoot:avg_flowering:year            1   0.2603        27     29.991
## avg_shoot:juli_date:year                1   2.7433        26     27.247
## avg_flowering:juli_date:year            1  12.1679        25     15.079
## avg_shoot:avg_flowering:juli_date:year  1   0.0000        24     15.079
##                                         Pr(>Chi)    
## NULL                                                
## avg_shoot                              0.8705576    
## avg_flowering                          0.7690317    
## juli_date                              0.0006431 ***
## year                                   0.5556475    
## avg_shoot:avg_flowering                0.0398348 *  
## avg_shoot:juli_date                    0.8185266    
## avg_flowering:juli_date                0.7746074    
## avg_shoot:year                         0.2336206    
## avg_flowering:year                     0.6271743    
## juli_date:year                         0.1513321    
## avg_shoot:avg_flowering:juli_date      0.8998829    
## avg_shoot:avg_flowering:year           0.6099220    
## avg_shoot:juli_date:year               0.0976606 .  
## avg_flowering:juli_date:year           0.0004862 ***
## avg_shoot:avg_flowering:juli_date:year 1.0000000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# How do we look at these two models with different parameters if the  models aren't nested? 

Visualize the binomial models

visreg(fit1.red, scale = "response", overlapy = T, gg=T)
## [[1]]

## 
## [[2]]

## 
## [[3]]

visreg(fit2, scale = "response", overlay = T, gg = T)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# diagnostic plots taken with a grain of salt for binomial models
par(mfrow=c(2,2))
plot(fit1.red, which=1:4)

# potential outliers with large influence based on Cook's distance
dat[4,] # Kaguk (high density, seined in july)
## Source: local data frame [1 x 30]
## Groups: <by row>
## 
## # A tibble: 1 x 30
##   site  n_surv1 n_surv2 dens_surv1 dens_surv2 avg_density sd_density year 
##   <fct>   <int>   <int>      <dbl>      <dbl>       <dbl>      <dbl> <fct>
## 1 Kagu~     104      82       15.5       12.2        13.8       2.31 2017 
## # ... with 22 more variables: dissolved_02_mg.l_surface <dbl>,
## #   dissolved_02_percent_surface <dbl>,
## #   specific_conductivity_surface <dbl>, salinity_ppt_surface <dbl>,
## #   temperature_C_surface <dbl>, dissolved_02_mg.l_transect <dbl>,
## #   dissolved_02_percent_transect <dbl>,
## #   specific_conductivity_transect <int>, salinity_ppt_transect <dbl>,
## #   temperature_C_transect <dbl>, avg_shoot <dbl>, sd_shoot <dbl>,
## #   avg_flowering <dbl>, sd_flowering <dbl>, date <fct>, juli_date <int>,
## #   SALCHIN <dbl>, SALCHUM <dbl>, SALCOHO <dbl>, SALPINK <dbl>,
## #   SALSOCK <dbl>, abundance <dbl>
dat[6,] # Guktu (high, density, seined in August)
## Source: local data frame [1 x 30]
## Groups: <by row>
## 
## # A tibble: 1 x 30
##   site  n_surv1 n_surv2 dens_surv1 dens_surv2 avg_density sd_density year 
##   <fct>   <int>   <int>      <dbl>      <dbl>       <dbl>      <dbl> <fct>
## 1 Gukt~     108     132       12.3       15.0        13.7       1.93 2017 
## # ... with 22 more variables: dissolved_02_mg.l_surface <dbl>,
## #   dissolved_02_percent_surface <dbl>,
## #   specific_conductivity_surface <dbl>, salinity_ppt_surface <dbl>,
## #   temperature_C_surface <dbl>, dissolved_02_mg.l_transect <dbl>,
## #   dissolved_02_percent_transect <dbl>,
## #   specific_conductivity_transect <int>, salinity_ppt_transect <dbl>,
## #   temperature_C_transect <dbl>, avg_shoot <dbl>, sd_shoot <dbl>,
## #   avg_flowering <dbl>, sd_flowering <dbl>, date <fct>, juli_date <int>,
## #   SALCHIN <dbl>, SALCHUM <dbl>, SALCOHO <dbl>, SALPINK <dbl>,
## #   SALSOCK <dbl>, abundance <dbl>
# Try and refit the model (fit1.red) without these outliers
fit1.red.up <- glm(abundance ~ avg_density+juli_date+year, family = binomial(link = logit), data = dat[-c(4,6),])
summary(fit1.red.up)
## 
## Call:
## glm(formula = abundance ~ avg_density + juli_date + year, family = binomial(link = logit), 
##     data = dat[-c(4, 6), ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2173  -0.4889   0.3298   0.6651   1.4967  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  6.41044    2.81176   2.280   0.0226 *
## avg_density  0.29214    0.23605   1.238   0.2159  
## juli_date   -0.03813    0.01563  -2.439   0.0147 *
## year2019    -0.10476    1.06320  -0.099   0.9215  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47.398  on 37  degrees of freedom
## Residual deviance: 31.438  on 34  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 39.438
## 
## Number of Fisher Scoring iterations: 6
visreg(fit1.red.up, scale = "response", overlap = T, gg=T)
## [[1]]

## 
## [[2]]

## 
## [[3]]

par(mfrow=c(2,2))
plot(fit1.red.up, which=1:4) # does this look better?

Fit the data to a model using abundances

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.693147

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## 
## Call:
## glm(formula = log(abundance + 1) ~ dens_surv1 + dens_surv2 + 
##     year + juli_date + avg_shoot + avg_flowering + dissolved_02_mg.l_surface + 
##     salinity_ppt_surface + temperature_C_surface + dissolved_02_mg.l_transect + 
##     salinity_ppt_transect + temperature_C_transect, family = poisson, 
##     data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.14938  -0.47115   0.01652   0.27558   0.72588  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)
## (Intercept)                 1.5483851  7.6239328   0.203    0.839
## dens_surv1                  0.0327237  0.0925717   0.353    0.724
## dens_surv2                  0.0058663  0.0451706   0.130    0.897
## year2019                    0.0099484  0.9017173   0.011    0.991
## juli_date                  -0.0141727  0.0151322  -0.937    0.349
## avg_shoot                  -0.0001681  0.0013338  -0.126    0.900
## avg_flowering               0.0463769  0.1266167   0.366    0.714
## dissolved_02_mg.l_surface  -0.0631941  0.3260231  -0.194    0.846
## salinity_ppt_surface        0.0106196  0.0693024   0.153    0.878
## temperature_C_surface       0.2296211  0.5531599   0.415    0.678
## dissolved_02_mg.l_transect  0.0058499  0.0161033   0.363    0.716
## salinity_ppt_transect       0.0149432  0.2101998   0.071    0.943
## temperature_C_transect     -0.2766764  0.5495297  -0.503    0.615
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 14.614  on 38  degrees of freedom
## Residual deviance: 10.397  on 26  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 5
## 
## Call:
## glm(formula = log(abundance + 1) ~ avg_density + year + juli_date + 
##     avg_shoot + avg_flowering + dissolved_02_mg.l_surface + salinity_ppt_surface + 
##     temperature_C_surface + dissolved_02_mg.l_transect + salinity_ppt_transect + 
##     temperature_C_transect, family = quasipoisson, data = dat)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.13508  -0.49083   0.01386   0.26941   0.71660  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                 2.0795717  3.7326437   0.557    0.582  
## avg_density                 0.0271487  0.0277611   0.978    0.337  
## year2019                    0.0317323  0.4611927   0.069    0.946  
## juli_date                  -0.0140974  0.0078301  -1.800    0.083 .
## avg_shoot                  -0.0002092  0.0006897  -0.303    0.764  
## avg_flowering               0.0450778  0.0657033   0.686    0.499  
## dissolved_02_mg.l_surface  -0.0803180  0.1656396  -0.485    0.632  
## salinity_ppt_surface        0.0083765  0.0355478   0.236    0.815  
## temperature_C_surface       0.2163638  0.2824358   0.766    0.450  
## dissolved_02_mg.l_transect  0.0052267  0.0081826   0.639    0.528  
## salinity_ppt_transect       0.0106419  0.1081765   0.098    0.922  
## temperature_C_transect     -0.2738793  0.2832244  -0.967    0.342  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.2684921)
## 
##     Null deviance: 14.614  on 38  degrees of freedom
## Residual deviance: 10.443  on 27  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
## Warning in sqrt(diag(vc_count)[kx + 1]): NaNs produced